*Final Regression Data Prep

********************************************************************************
*generate market level weighted average TICKET prices

use regressionsdatasetwithIVs, clear

gen weightpassmeanmktfare_mkt= passmeanmktfare_mkt*mktpass_tkopcarrier
bysort tkcarrier market year quarter: egen sumweightpassmeanmktfare_mkt=sum(weightpassmeanmktfare_mkt)
bysort tkcarrier market year quarter: egen summktpass_tkopcarrier =sum(mktpass_tkopcarrier)
ge weightedmeanfare= sumweightpassmeanmktfare_mkt/summktpass_tkopcarrier

keep weightedmeanfare tkcarrier market year quarter

duplicates drop

sort tkcarrier year quarter

save ticketprice, replace

********************************************************************************
*merge the files

use regressionsdatasetwithIVs, clear

*merge in carrier counts and weighted prices
sort tkcarrier year quarter market
merge m:1 tkcarrier year quarter market using ticketprice.dta
drop if _merge == 2
drop _merge
erase ticketprice.dta

********************************************************************************
* prepare varables

gen logprice = log(weightedmeanfare)

encode market, gen(marketnum)

*Add dummies for carriers
tab tkcarrier, gen(dummy_tkcarrier)

*add dummies for years
tab year, gen(dummy_year)

*add dummies if market served by single firm
gen monopoly = 0
replace monopoly =1 if missing(mmc_ek) 

*replace monopoly observations with 0's
replace mmc_ek = 0 if monopoly == 1
replace csc_avg = 0 if monopoly == 1

********************************************************************************
*gen tkcarrier-market-time dummy variable for independent regional usage
bysort tkcarrier market year quarter: egen onenotowned_market = max(onenotowned)

********************************************************************************
*calculate percentage of passenger-miles in each market-major-year transported by regionals

sort market year

gen passenger_miles1 = mktpass_tkopcarrier*sgmdistance1
gen passenger_miles2 = mktpass_tkopcarrier*sgmdistance2
gen passenger_miles3 = mktpass_tkopcarrier*sgmdistance3

gen passenger_miles1_owned = 0
replace passenger_miles1_owned = passenger_miles1 if owned_regional1 == 1
gen passenger_miles2_owned = 0
replace passenger_miles2_owned = passenger_miles2 if owned_regional2 == 1
gen passenger_miles3_owned = 0
replace passenger_miles3_owned = passenger_miles3 if owned_regional3 == 1

gen passenger_miles1_ind = 0
replace passenger_miles1_ind = passenger_miles1 if unowned_regional1 == 1
gen passenger_miles2_ind = 0
replace passenger_miles2_ind = passenger_miles2 if unowned_regional2 == 1
gen passenger_miles3_ind = 0
replace passenger_miles3_ind = passenger_miles3 if unowned_regional3 == 1

gen total_route_pass_miles = passenger_miles1+passenger_miles2+passenger_miles3
gen total_route_pass_miles_own = passenger_miles1_owned+passenger_miles2_owned+passenger_miles3_owned
gen total_route_pass_miles_ind = passenger_miles1_ind + passenger_miles2_ind + passenger_miles3_ind

gen pct_major_ind_chain=total_route_pass_miles_ind/total_route_pass_miles
gen pct_major_own_chain=total_route_pass_miles_own/total_route_pass_miles

********************************************************************************
*bring to major-mkt level
egen mktktlevl_airportinUS_reg=rsum(airportsinUS_REG1 airportsinUS_REG2 airportsinUS_REG3)
bysort market year tkcarrier: egen Mmktktlevl_airportinUS_reg=mean(mktktlevl_airportinUS_reg)
drop mktktlevl_airportinUS_reg

egen mktktlevl_airptntwrkorigin_reg=rmean(airportnetworkorigin_REG1 airportnetworkorigin_REG2 airportnetworkorigin_REG3)
egen mktktlevl_airptntwrkdest_reg=rmean(airportnetworkdest_REG1 airportnetworkdest_REG2 airportnetworkdest_REG3)
drop airportsinUS_REG1 airportsinUS_REG2 airportsinUS_REG3 origin1-airportnetworkdest_REG3

bysort market year tkcarrier: egen Mmktktlevl_airptntwrkorigin_reg=mean(mktktlevl_airptntwrkorigin_reg)
bysort market year tkcarrier: egen Mmktktlevl_airptntwrkdest_reg=mean(mktktlevl_airptntwrkdest_reg)
drop mktktlevl_airptntwrkdest_reg mktktlevl_airptntwrkorigin_reg

bysort market year tkcarrier: egen total_major_pass_miles = sum(total_route_pass_miles) 
bysort market year tkcarrier: egen total_major_pass_miles_own = sum(total_route_pass_miles_own)
bysort market year tkcarrier: egen total_major_pass_miles_ind = sum(total_route_pass_miles_ind)

gen pct_major_own = total_major_pass_miles_own/total_major_pass_miles
gen pct_major_ind = total_major_pass_miles_ind/total_major_pass_miles

bysort market year tkcarrier: egen m_worstprcp = max(worstprcp)
bysort market year tkcarrier: egen m_worstsnow = max(worstsnow)
bysort market year tkcarrier: egen m_worstsnwd = max(worstsnwd)
bysort market year tkcarrier: egen m_worsttmin = min(worsttmin)

********************************************************************************
* using distances
egen minsegmdistance=rmin(sgmdistance1 sgmdistance2 sgmdistance3)
bysort tkcarrier market year quarter: egen Mminsegmentdistance=min(minsegmdistance)

bysort tkcarrier market year quarter: gen numbercombos=_N

********************************************************************************
* select variables if moving to major-market level
keep tkcarrier logprice onenotowned_market dummy* csc_avg csc_count_avg csc_weighted_avg mmc_ek marketnum monopoly year quarter market markets_served_origin_TKT* markets_served_dest_TKT* potential* numbercombos pct_major_ind Mmktktlevl_airptntwrkdest_reg Mmktktlevl_airptntwrkorigin_reg Mmktktlevl_airportinUS_reg numbercombos Mminsegmentdistance m_worstprcp m_worstsnow m_worstsnwd m_worsttmin
duplicates drop

********************************************************************************
* market networks of the competitors:
bysort year quarter market: egen IVmkts_served_origin_TKT=sum(markets_served_origin_TKT)
replace IVmkts_served_origin_TKT=IVmkts_served_origin_TKT-markets_served_origin_TKT
bysort year quarter market: egen IVmkts_served_dest_TKT=sum(markets_served_dest_TKT)
replace IVmkts_served_dest_TKT=IVmkts_served_dest_TKT-markets_served_dest_TKT

bysort market year quarter: gen num_comp = _N
gen IV_mkts_served_origin_AVG = IVmkts_served_origin_TKT/(num_comp-1)
gen IV_mkts_served_dest_AVG = IVmkts_served_dest_TKT/(num_comp-1)

* regional airports of competitors
bysort year quarter market: egen IVmktktlevl_airportinUS_reg=sum(Mmktktlevl_airportinUS_reg)
replace IVmktktlevl_airportinUS_reg=IVmktktlevl_airportinUS_reg-Mmktktlevl_airportinUS_reg

bysort year quarter market: egen IVMminsegmentdistance=sum(Mminsegmentdistance)
replace IVMminsegmentdistance=IVMminsegmentdistance-Mminsegmentdistance
bysort year quarter market: egen IVnumbercombos=sum(numbercombos)
replace IVnumbercombos=IVnumbercombos-numbercombos

foreach var of varlist m_worstprcp m_worstsnow m_worstsnwd m_worsttmin {
       bysort year quarter market: egen IV`var'=sum(`var')
	   replace IV`var'=IV`var'-`var'
}

********************************************************************************
*merge in regional HHI measure
sort market year quarter
merge m:1 market year quarter using market_level_regional_hhi_data
drop _merge

********************************************************************************
*Set up panel data, market level fixed effects
xtset marketnum

*Generate trend variables
bysort year: gen trend=_n==1
replace trend=sum(trend)
gen mmc_ek_trend=mmc_ek*trend
gen csc_avg_trend=csc_avg*trend
gen pct_major_trend = pct_major_ind*trend

gen csc_count_avg_trend = csc_count_avg*trend

gen csc_weighted_avg_trend = csc_weighted_avg*trend

save finaldatasetforregressions.dta, replace